## Warning: package 'data.table' was built under R version 4.1.1
## Warning: package 'ggplot2' was built under R version 4.1.1
## Warning: package 'ggthemes' was built under R version 4.1.1
## Warning: package 'forecast' was built under R version 4.1.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Warning: package 'tseries' was built under R version 4.1.1
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'tidyr' was built under R version 4.1.1
## Warning: package 'stringr' was built under R version 4.1.1
## Warning: package 'plotly' was built under R version 4.1.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
dt_org <- fread('train_1.csv',header = TRUE, data.table = TRUE, na.strings=c("NA","?", ""))
head(dt_org,10)
## Page 2015-07-01 2015-07-02
## 1: 2NE1_zh.wikipedia.org_all-access_spider 18 11
## 2: 2PM_zh.wikipedia.org_all-access_spider 11 14
## 3: 3C_zh.wikipedia.org_all-access_spider 1 0
## 4: 4minute_zh.wikipedia.org_all-access_spider 35 13
## 5: 52_Hz_I_Love_You_zh.wikipedia.org_all-access_spider NA NA
## 6: 5566_zh.wikipedia.org_all-access_spider 12 7
## 7: 91Days_zh.wikipedia.org_all-access_spider NA NA
## 8: A'N'D_zh.wikipedia.org_all-access_spider 118 26
## 9: AKB48_zh.wikipedia.org_all-access_spider 5 23
## 10: ASCII_zh.wikipedia.org_all-access_spider 6 3
## 2015-07-03 2015-07-04 2015-07-05 2015-07-06 2015-07-07 2015-07-08
## 1: 5 13 14 9 9 22
## 2: 15 18 11 13 22 11
## 3: 1 1 0 4 0 3
## 4: 10 94 4 26 14 9
## 5: NA NA NA NA NA NA
## 6: 4 5 20 8 5 17
## 7: NA NA NA NA NA NA
## 8: 30 24 29 127 53 37
## 9: 14 12 9 9 35 15
## 10: 5 12 6 5 4 13
## 2015-07-09 2015-07-10 2015-07-11 2015-07-12 2015-07-13 2015-07-14
## 1: 26 24 19 10 14 15
## 2: 10 4 41 65 57 38
## 3: 4 4 1 1 1 6
## 4: 11 16 16 11 23 145
## 5: NA NA NA NA NA NA
## 6: 24 7 12 11 7 9
## 7: NA NA NA NA NA NA
## 8: 20 32 17 23 47 33
## 9: 14 22 8 16 18 12
## 10: 9 15 18 7 8 12
## 2015-07-15 2015-07-16 2015-07-17 2015-07-18 2015-07-19 2015-07-20
## 1: 8 16 8 8 16 7
## 2: 20 62 44 15 10 47
## 3: 8 6 4 5 1 2
## 4: 14 17 85 4 30 22
## 5: NA NA NA NA NA NA
## 6: 6 10 8 13 3 14
## 7: NA NA NA NA NA NA
## 8: 47 58 29 187 128 34
## 9: 14 14 7 7 20 82
## 10: 25 23 6 10 7 3
## 2015-07-21 2015-07-22 2015-07-23 2015-07-24 2015-07-25 2015-07-26
## 1: 11 10 20 18 15 14
## 2: 24 17 22 9 39 13
## 3: 3 8 8 6 6 2
## 4: 9 10 11 7 7 11
## 5: NA NA NA NA NA NA
## 6: 4 9 14 10 8 3
## 7: NA NA NA NA NA NA
## 8: 38 8 38 17 45 14
## 9: 8 17 18 15 23 11
## 10: 17 10 16 10 7 6
## 2015-07-27 2015-07-28 2015-07-29 2015-07-30 2015-07-31 2015-08-01
## 1: 49 10 16 18 8 5
## 2: 11 12 21 19 9 15
## 3: 2 3 2 4 3 3
## 4: 9 11 44 8 14 19
## 5: NA NA NA NA NA NA
## 6: 74 17 8 6 9 3
## 7: NA NA NA NA NA NA
## 8: 15 56 30 15 115 6
## 9: 20 35 20 11 13 11
## 10: 15 12 32 7 10 5
## 2015-08-02 2015-08-03 2015-08-04 2015-08-05 2015-08-06 2015-08-07
## 1: 9 7 13 9 7 4
## 2: 33 8 8 7 13 2
## 3: 5 3 5 4 2 5
## 4: 10 17 17 10 7 10
## 5: NA NA NA NA NA NA
## 6: 10 21 9 5 3 4
## 7: NA NA NA NA NA NA
## 8: 25 10 135 40 63 32
## 9: 13 20 47 25 93 13
## 10: 13 22 15 12 9 3
## 2015-08-08 2015-08-09 2015-08-10 2015-08-11 2015-08-12 2015-08-13
## 1: 11 10 5 9 9 9
## 2: 23 12 27 27 36 23
## 3: 1 4 5 0 0 7
## 4: 1 8 27 19 16 2
## 5: NA NA NA NA NA NA
## 6: 1 7 3 15 5 6
## 7: NA NA NA NA NA NA
## 8: 35 65 14 21 35 50
## 9: 18 24 8 13 6 12
## 10: 12 6 4 6 4 4
## 2015-08-14 2015-08-15 2015-08-16 2015-08-17 2015-08-18 2015-08-19
## 1: 9 13 4 15 25 9
## 2: 58 80 60 69 42 161
## 3: 3 5 1 6 2 5
## 4: 84 22 14 47 25 14
## 5: NA NA NA NA NA NA
## 6: 9 4 6 7 9 11
## 7: NA NA NA NA NA NA
## 8: 49 38 12 37 87 66
## 9: 5 44 15 13 19 12
## 10: 16 11 8 5 5 6
## 2015-08-20 2015-08-21 2015-08-22 2015-08-23 2015-08-24 2015-08-25
## 1: 5 6 20 3 14 46
## 2: 94 77 78 20 24 13
## 3: 0 3 1 0 1 1
## 4: 11 12 27 8 17 43
## 5: NA NA NA NA NA NA
## 6: 11 2 3 7 23 4
## 7: NA NA NA NA NA NA
## 8: 90 64 402 86 90 30
## 9: 6 11 12 24 27 60
## 10: 12 9 15 10 10 15
## 2015-08-26 2015-08-27 2015-08-28 2015-08-29 2015-08-30 2015-08-31
## 1: 5 5 13 4 9 10
## 2: 14 26 8 82 22 11
## 3: 2 4 2 1 1 3
## 4: 3 19 14 20 43 4
## 5: NA NA NA NA NA NA
## 6: 8 8 10 5 8 4
## 7: NA NA NA NA NA NA
## 8: 86 61 7 44 7 15
## 9: 11 18 10 15 12 37
## 10: 5 7 9 7 9 9
## 2015-09-01 2015-09-02 2015-09-03 2015-09-04 2015-09-05 2015-09-06
## 1: 9 11 11 11 9 15
## 2: 81 37 9 40 47 18
## 3: 4 3 6 6 4 3
## 4: 5 37 23 14 12 13
## 5: NA NA NA NA NA NA
## 6: 4 8 18 6 4 9
## 7: NA NA NA NA NA NA
## 8: 11 61 9 23 31 154
## 9: 18 15 7 25 20 14
## 10: 7 12 6 9 9 9
## 2015-09-07 2015-09-08 2015-09-09 2015-09-10 2015-09-11 2015-09-12
## 1: 5 10 7 4 8 9
## 2: 23 6 2 7 16 10
## 3: 3 2 9 7 2 3
## 4: 22 12 12 6 27 5
## 5: NA NA NA NA NA NA
## 6: 6 8 5 11 5 10
## 7: NA NA NA NA NA NA
## 8: 11 16 5 6 7 5
## 9: 14 14 13 7 15 12
## 10: 26 2 7 15 10 8
## 2015-09-13 2015-09-14 2015-09-15 2015-09-16 2015-09-17 2015-09-18
## 1: 10 6 13 16 6 24
## 2: 34 14 31 20 23 14
## 3: 1 3 1 6 7 1
## 4: 7 24 8 9 10 12
## 5: NA NA NA NA NA NA
## 6: 25 29 3 10 9 16
## 7: NA NA NA NA NA NA
## 8: 4 9 12 31 11 11
## 9: 15 7 15 32 15 23
## 10: 2 17 9 24 23 14
## 2015-09-19 2015-09-20 2015-09-21 2015-09-22 2015-09-23 2015-09-24
## 1: 9 11 12 8 14 6
## 2: 16 34 15 30 13 30
## 3: 2 5 2 3 8 5
## 4: 19 7 7 18 15 7
## 5: NA NA NA NA NA NA
## 6: 13 7 22 9 9 17
## 7: NA NA NA NA NA NA
## 8: 10 21 7 9 5 1
## 9: 78 107 25 55 7 41
## 10: 14 13 21 24 19 18
## 2015-09-25 2015-09-26 2015-09-27 2015-09-28 2015-09-29 2015-09-30
## 1: 6 11 14 6 10 20
## 2: 15 25 17 8 12 17
## 3: 0 4 1 5 3 0
## 4: 9 10 9 14 8 17
## 5: NA NA NA NA NA NA
## 6: 10 24 13 23 12 2
## 7: NA NA NA NA NA NA
## 8: 68 9 4 10 44 34
## 9: 31 25 22 14 16 22
## 10: 13 14 14 15 10 16
## 2015-10-01 2015-10-02 2015-10-03 2015-10-04 2015-10-05 2015-10-06
## 1: 7 15 8 15 5 8
## 2: 10 21 18 30 13 7
## 3: 1 8 2 1 3 0
## 4: 6 8 7 5 3 9
## 5: NA NA NA NA NA NA
## 6: 14 13 25 13 12 11
## 7: NA NA NA NA NA NA
## 8: 59 53 12 71 26 79
## 9: 20 26 21 25 53 22
## 10: 13 13 14 17 11 6
## 2015-10-07 2015-10-08 2015-10-09 2015-10-10 2015-10-11 2015-10-12
## 1: 8 5 11 165 34 6
## 2: 15 23 20 15 9 47
## 3: 0 5 3 3 0 2
## 4: 5 6 8 8 11 6
## 5: NA NA NA NA NA NA
## 6: 8 5 30 6 10 10
## 7: NA NA NA NA NA NA
## 8: 21 80 43 32 72 62
## 9: 20 15 32 25 40 26
## 10: 25 21 14 15 7 12
## 2015-10-13 2015-10-14 2015-10-15 2015-10-16 2015-10-17 2015-10-18
## 1: 13 8 9 11 26 18
## 2: 14 11 16 12 7 15
## 3: 5 2 5 10 5 6
## 4: 7 28 15 8 7 7
## 5: NA NA NA NA NA NA
## 6: 10 6 8 13 21 29
## 7: NA NA NA NA NA NA
## 8: 12 6 14 4 69 25
## 9: 14 19 28 25 23 35
## 10: 23 16 12 16 8 10
## 2015-10-19 2015-10-20 2015-10-21 2015-10-22 2015-10-23 2015-10-24
## 1: 3 5 12 6 16 19
## 2: 14 12 18 29 39 11
## 3: 1 4 4 1 3 13
## 4: 12 5 11 3 7 23
## 5: NA NA NA NA NA NA
## 6: 38 6 22 10 6 11
## 7: NA NA NA NA NA NA
## 8: 18 3 8 5 12 18
## 9: 28 11 25 19 21 28
## 10: 18 16 22 12 15 13
## 2015-10-25 2015-10-26 2015-10-27 2015-10-28 2015-10-29 2015-10-30
## 1: 9 10 11 11 7 9
## 2: 14 28 17 20 17 36
## 3: 2 1 3 2 1 10
## 4: 6 3 8 8 39 4
## 5: NA NA NA NA NA NA
## 6: 13 6 8 14 16 16
## 7: NA NA NA NA NA NA
## 8: 8 45 13 3 19 43
## 9: 29 36 63 61 38 38
## 10: 16 19 21 23 25 12
## 2015-10-31 2015-11-01 2015-11-02 2015-11-03 2015-11-04 2015-11-05
## 1: 10 24 6 6 8 16
## 2: 13 11 14 14 14 33
## 3: 5 6 2 5 2 2
## 4: 10 6 8 9 16 9
## 5: NA NA NA NA NA NA
## 6: 4 12 7 9 9 8
## 7: NA NA NA NA NA NA
## 8: 31 15 40 63 27 3
## 9: 48 70 30 39 19 38
## 10: 15 14 17 16 13 14
## 2015-11-06 2015-11-07 2015-11-08 2015-11-09 2015-11-10 2015-11-11
## 1: 13 10 10 6 5 20
## 2: 14 13 18 13 11 8
## 3: 3 2 6 3 2 1
## 4: 8 8 7 5 5 12
## 5: NA NA NA NA NA NA
## 6: 11 8 17 7 4 11
## 7: NA NA NA NA NA NA
## 8: 29 20 12 12 2 5
## 9: 44 25 32 33 23 16
## 10: 12 9 13 18 20 7
## 2015-11-12 2015-11-13 2015-11-14 2015-11-15 2015-11-16 2015-11-17
## 1: 6 47 9 9 12 11
## 2: 10 11 81 14 20 6
## 3: 2 3 1 1 2 2
## 4: 8 15 9 12 5 7
## 5: NA NA NA NA NA NA
## 6: 8 4 3 22 9 6
## 7: NA NA NA NA NA NA
## 8: 18 6 25 13 57 5
## 9: 13 20 31 16 24 24
## 10: 15 21 14 13 25 19
## 2015-11-18 2015-11-19 2015-11-20 2015-11-21 2015-11-22 2015-11-23
## 1: 17 15 14 11 97 11
## 2: 16 18 9 12 10 8
## 3: 3 2 2 5 7 2
## 4: 6 12 7 6 33 5
## 5: NA NA NA NA NA NA
## 6: 13 12 12 8 7 25
## 7: NA NA NA NA NA NA
## 8: 4 1 110 93 142 114
## 9: 22 15 43 34 37 41
## 10: 19 23 32 20 9 14
## 2015-11-24 2015-11-25 2015-11-26 2015-11-27 2015-11-28 2015-11-29
## 1: 12 11 14 15 12 104
## 2: 11 14 47 13 13 6
## 3: 3 4 6 1 3 6
## 4: 11 6 4 32 9 17
## 5: NA NA NA NA NA NA
## 6: 7 11 9 5 21 6
## 7: NA NA NA NA NA NA
## 8: 140 37 6 30 96 18
## 9: 50 59 46 72 31 28
## 10: 16 21 26 14 19 24
## 2015-11-30 2015-12-01 2015-12-02 2015-12-03 2015-12-04 2015-12-05
## 1: 5 22 45 75 29 34
## 2: 10 8 8 8 18 31
## 3: 3 3 4 2 2 4
## 4: 2 10 10 5 7 11
## 5: NA NA NA NA NA NA
## 6: 12 5 12 9 7 11
## 7: NA NA NA NA NA NA
## 8: 66 25 3 9 153 246
## 9: 17 42 54 59 13 23
## 10: 14 15 21 17 27 11
## 2015-12-06 2015-12-07 2015-12-08 2015-12-09 2015-12-10 2015-12-11
## 1: 20 12 25 9 62 20
## 2: 16 15 10 13 9 32
## 3: 3 1 5 5 4 2
## 4: 8 10 6 17 11 20
## 5: NA NA NA NA NA NA
## 6: 73 14 4 12 11 5
## 7: NA NA NA NA NA NA
## 8: 100 38 40 24 6 7
## 9: 31 44 40 32 27 28
## 10: 21 16 27 23 28 23
## 2015-12-12 2015-12-13 2015-12-14 2015-12-15 2015-12-16 2015-12-17
## 1: 19 8 23 13 16 34
## 2: 161 6 20 8 11 13
## 3: 4 5 4 2 1 6
## 4: 11 15 18 10 15 12
## 5: NA NA NA NA NA NA
## 6: 20 7 6 9 17 14
## 7: NA NA NA NA NA NA
## 8: 21 88 57 21 16 34
## 9: 31 39 15 56 131 27
## 10: 21 18 35 29 17 25
## 2015-12-18 2015-12-19 2015-12-20 2015-12-21 2015-12-22 2015-12-23
## 1: 36 11 18 12 24 30
## 2: 8 19 7 9 16 11
## 3: 1 1 3 1 3 5
## 4: 12 12 8 13 9 11
## 5: NA NA NA NA NA NA
## 6: 17 10 16 8 8 14
## 7: NA NA NA NA NA NA
## 8: 18 27 13 80 26 7
## 9: 40 27 32 25 38 36
## 10: 28 20 15 22 19 25
## 2015-12-24 2015-12-25 2015-12-26 2015-12-27 2015-12-28 2015-12-29
## 1: 27 44 35 53 11 26
## 2: 6 38 11 17 13 12
## 3: 3 3 0 5 3 2
## 4: 4 12 9 6 12 9
## 5: NA NA NA NA NA NA
## 6: 13 14 13 7 14 10
## 7: NA NA NA NA NA NA
## 8: 11 15 12 19 9 19
## 9: 46 26 42 20 34 27
## 10: 23 36 16 13 25 13
## 2015-12-30 2015-12-31 2016-01-01 2016-01-02 2016-01-03 2016-01-04
## 1: 13 18 9 16 6 19
## 2: 12 9 7 15 14 14
## 3: 2 2 2 0 3 3
## 4: 9 6 7 7 11 7
## 5: NA NA NA NA NA NA
## 6: 16 14 10 9 8 14
## 7: NA NA NA NA NA NA
## 8: 17 32 156 84 61 33
## 9: 26 46 44 48 33 21
## 10: 19 24 11 18 20 27
## 2016-01-05 2016-01-06 2016-01-07 2016-01-08 2016-01-09 2016-01-10
## 1: 20 19 22 30 14 16
## 2: 11 13 12 12 24 15
## 3: 3 4 4 8 3 5
## 4: 14 9 21 9 10 13
## 5: NA NA NA NA NA NA
## 6: 5 10 11 22 13 9
## 7: NA NA NA NA NA NA
## 8: 18 25 270 108 39 31
## 9: 47 32 19 20 30 37
## 10: 26 21 30 36 36 41
## 2016-01-11 2016-01-12 2016-01-13 2016-01-14 2016-01-15 2016-01-16
## 1: 22 15 15 26 16 13
## 2: 38 18 26 15 12 14
## 3: 8 1 4 0 3 6
## 4: 10 13 16 8 10 7
## 5: NA NA NA NA NA NA
## 6: 10 12 15 14 11 9
## 7: NA NA NA NA NA NA
## 8: 242 45 51 99 35 52
## 9: 45 24 38 31 39 35
## 10: 33 23 21 31 16 24
## 2016-01-17 2016-01-18 2016-01-19 2016-01-20 2016-01-21 2016-01-22
## 1: 27 18 13 32 31 16
## 2: 40 19 13 39 19 16
## 3: 3 1 3 3 3 1
## 4: 13 18 8 50 8 33
## 5: NA NA NA NA NA NA
## 6: 12 6 18 11 20 16
## 7: NA NA NA NA NA NA
## 8: 46 45 26 29 33 43
## 9: 32 32 44 50 54 31
## 10: 24 12 16 23 29 14
## 2016-01-23 2016-01-24 2016-01-25 2016-01-26 2016-01-27 2016-01-28
## 1: 38 18 9 14 10 24
## 2: 19 11 76 14 19 26
## 3: 3 8 4 3 2 5
## 4: 6 22 9 84 28 11
## 5: NA NA NA NA NA NA
## 6: 14 15 11 12 17 11
## 7: NA NA NA NA NA NA
## 8: 38 25 23 38 35 27
## 9: 30 24 33 25 24 38
## 10: 16 12 28 23 38 30
## 2016-01-29 2016-01-30 2016-01-31 2016-02-01 2016-02-02 2016-02-03
## 1: 8 15 18 10 23 17
## 2: 19 17 30 17 17 17
## 3: 6 3 6 5 6 7
## 4: 7 14 16 49 71 29
## 5: NA NA NA NA NA NA
## 6: 19 13 13 7 24 8
## 7: NA NA NA NA NA NA
## 8: 40 24 68 22 40 13
## 9: 34 47 31 34 41 42
## 10: 40 17 11 16 14 16
## 2016-02-04 2016-02-05 2016-02-06 2016-02-07 2016-02-08 2016-02-09
## 1: 11 26 14 8 12 9
## 2: 19 11 175 10 5 12
## 3: 3 1 5 1 2 0
## 4: 22 6 34 16 14 9
## 5: NA NA NA NA NA NA
## 6: 6 9 12 5 10 5
## 7: NA NA NA NA NA NA
## 8: 19 43 26 22 13 75
## 9: 39 24 18 28 25 24
## 10: 20 21 18 17 13 12
## 2016-02-10 2016-02-11 2016-02-12 2016-02-13 2016-02-14 2016-02-15
## 1: 11 34 17 29 11 9
## 2: 7 12 14 19 11 19
## 3: 1 4 3 3 9 4
## 4: 12 24 18 8 26 8
## 5: NA NA NA NA NA NA
## 6: 8 2 9 10 10 15
## 7: NA NA NA NA NA NA
## 8: 25 30 39 5 43 24
## 9: 76 46 29 37 92 157
## 10: 20 20 21 17 23 20
## 2016-02-16 2016-02-17 2016-02-18 2016-02-19 2016-02-20 2016-02-21
## 1: 14 21 12 11 13 11
## 2: 17 15 19 15 9 20
## 3: 7 5 10 2 3 3
## 4: 8 13 21 9 10 14
## 5: NA NA NA NA NA NA
## 6: 18 19 45 6 13 40
## 7: NA NA NA NA NA NA
## 8: 41 266 15 15 17 63
## 9: 42 38 37 40 28 68
## 10: 23 29 28 17 19 14
## 2016-02-22 2016-02-23 2016-02-24 2016-02-25 2016-02-26 2016-02-27
## 1: 13 16 13 19 21 14
## 2: 6 11 6 15 20 35
## 3: 4 2 3 5 3 6
## 4: 12 9 10 20 15 26
## 5: NA NA NA NA NA NA
## 6: 14 11 8 5 17 16
## 7: NA NA NA NA NA NA
## 8: 39 35 31 32 26 77
## 9: 41 29 104 28 24 33
## 10: 22 32 25 22 23 30
## 2016-02-28 2016-02-29 2016-03-01 2016-03-02 2016-03-03 2016-03-04
## 1: 11 35 18 42 15 5
## 2: 34 21 17 22 26 16
## 3: 4 5 5 2 1 4
## 4: 24 19 10 12 8 16
## 5: NA NA NA NA NA NA
## 6: 13 22 16 10 11 18
## 7: NA NA NA NA NA NA
## 8: 25 111 22 35 15 28
## 9: 44 39 39 40 31 43
## 10: 21 17 26 16 27 43
## 2016-03-05 2016-03-06 2016-03-07 2016-03-08 2016-03-09 2016-03-10
## 1: 21 56 9 20 17 18
## 2: 16 28 19 17 15 11
## 3: 7 2 2 5 1 0
## 4: 13 8 17 12 34 10
## 5: NA NA NA NA NA NA
## 6: 53 213 20 19 15 4
## 7: NA NA NA NA NA NA
## 8: 21 20 17 10 12 9
## 9: 52 56 55 52 25 18
## 10: 25 30 28 25 42 28
## 2016-03-11 2016-03-12 2016-03-13 2016-03-14 2016-03-15 2016-03-16
## 1: 8 9 17 9 10 14
## 2: 7 15 11 36 16 22
## 3: 3 3 1 2 4 2
## 4: 9 9 15 10 12 8
## 5: NA NA NA NA NA NA
## 6: 8 14 8 11 15 13
## 7: NA NA NA NA NA NA
## 8: 36 21 14 36 8 20
## 9: 18 28 17 31 35 64
## 10: 23 23 33 19 28 31
## 2016-03-17 2016-03-18 2016-03-19 2016-03-20 2016-03-21 2016-03-22
## 1: 17 6 18 13 11 12
## 2: 18 46 17 15 17 12
## 3: 2 3 4 7 1 1
## 4: 11 9 28 17 11 13
## 5: NA NA NA NA NA NA
## 6: 8 11 7 12 22 19
## 7: NA NA NA NA NA NA
## 8: 11 54 8 7 15 53
## 9: 69 24 16 34 28 42
## 10: 21 25 23 32 36 21
## 2016-03-23 2016-03-24 2016-03-25 2016-03-26 2016-03-27 2016-03-28
## 1: 11 8 15 11 20 59
## 2: 17 14 15 14 15 28
## 3: 10 9 5 1 6 7
## 4: 10 10 10 16 12 12
## 5: NA NA NA NA NA NA
## 6: 9 11 11 8 11 14
## 7: NA NA NA NA NA NA
## 8: 14 20 17 20 14 11
## 9: 24 43 22 114 41 23
## 10: 28 17 39 32 27 22
## 2016-03-29 2016-03-30 2016-03-31 2016-04-01 2016-04-02 2016-04-03
## 1: 11 18 17 12 14 13
## 2: 36 23 12 25 18 18
## 3: 4 6 2 4 155 155
## 4: 13 25 25 18 18 23
## 5: NA NA NA NA NA NA
## 6: 17 13 12 14 8 18
## 7: NA NA NA NA NA NA
## 8: 18 15 28 39 19 23
## 9: 28 37 33 38 73 28
## 10: 39 34 30 49 18 14
## 2016-04-04 2016-04-05 2016-04-06 2016-04-07 2016-04-08 2016-04-09
## 1: 9 490 189 102 38 126
## 2: 16 20 17 16 13 15
## 3: 83 48 31 16 6 13
## 4: 27 39 11 16 9 26
## 5: NA NA NA NA NA NA
## 6: 20 28 19 21 21 26
## 7: NA NA NA NA NA NA
## 8: 22 14 13 47 8 12
## 9: 28 25 27 28 36 76
## 10: 14 22 50 24 29 28
## 2016-04-10 2016-04-11 2016-04-12 2016-04-13 2016-04-14 2016-04-15
## 1: 71 21 57 79 17 17
## 2: 19 14 20 37 16 15
## 3: 8 8 5 7 3 4
## 4: 14 15 10 23 17 74
## 5: NA NA NA NA NA NA
## 6: 17 27 22 20 33 21
## 7: NA NA NA NA NA NA
## 8: 10 8 22 16 12 19
## 9: 35 66 61 68 32 48
## 10: 22 37 33 35 28 27
## 2016-04-16 2016-04-17 2016-04-18 2016-04-19 2016-04-20 2016-04-21
## 1: 23 16 23 18 22 44
## 2: 11 42 10 14 61 39
## 3: 6 7 10 9 7 8
## 4: 114 8 15 15 15 12
## 5: NA 38 159 9 4 1
## 6: 17 14 8 8 10 14
## 7: NA NA NA NA NA NA
## 8: 11 46 15 16 30 17
## 9: 27 39 57 43 31 37
## 10: 25 20 36 23 28 28
## 2016-04-22 2016-04-23 2016-04-24 2016-04-25 2016-04-26 2016-04-27
## 1: 6 31 17 25 40 19
## 2: 17 17 41 35 16 9
## 3: 4 6 5 2 7 3
## 4: 14 14 23 21 11 19
## 5: 10 9 2 0 5 0
## 6: 28 23 26 11 28 6
## 7: NA NA NA NA NA NA
## 8: 18 32 17 141 150 13
## 9: 27 26 22 62 25 27
## 10: 27 19 27 22 33 21
## 2016-04-28 2016-04-29 2016-04-30 2016-05-01 2016-05-02 2016-05-03
## 1: 15 15 29 18 16 13
## 2: 64 22 22 66 33 30
## 3: 7 6 3 1 6 2
## 4: 9 10 11 14 9 5
## 5: 3 55 234 57 5 4
## 6: 18 18 19 11 11 18
## 7: NA NA NA NA NA NA
## 8: 107 30 23 12 17 28
## 9: 25 15 25 40 26 26
## 10: 42 15 19 25 11 20
## 2016-05-04 2016-05-05 2016-05-06 2016-05-07 2016-05-08 2016-05-09
## 1: 20 22 19 11 50 22
## 2: 16 18 45 17 88 23
## 3: 1 3 8 3 5 4
## 4: 10 20 22 16 9 10
## 5: 4 0 9 9 6 6
## 6: 16 25 19 8 13 14
## 7: NA NA NA NA NA NA
## 8: 24 73 88 17 27 50
## 9: 34 40 107 45 44 35
## 10: 23 22 34 16 28 38
## 2016-05-10 2016-05-11 2016-05-12 2016-05-13 2016-05-14 2016-05-15
## 1: 39 23 21 23 22 16
## 2: 18 12 12 13 13 5
## 3: 7 5 2 5 0 3
## 4: 42 22 7 7 54 7
## 5: 6 10 7 5 4 6
## 6: 12 8 16 13 14 14
## 7: NA NA NA NA NA NA
## 8: 29 51 28 78 24 7
## 9: 42 37 143 36 34 25
## 10: 47 25 29 33 23 19
## 2016-05-16 2016-05-17 2016-05-18 2016-05-19 2016-05-20 2016-05-21
## 1: 19 35 16 12 15 13
## 2: 11 13 11 22 10 13
## 3: 12 4 2 4 6 4
## 4: 9 13 5 10 12 18
## 5: 4 2 6 5 3 3
## 6: 8 9 13 15 20 15
## 7: NA NA NA NA NA NA
## 8: 25 8 24 58 24 36
## 9: 16 22 27 22 32 39
## 10: 35 34 38 35 38 18
## 2016-05-22 2016-05-23 2016-05-24 2016-05-25 2016-05-26 2016-05-27
## 1: 14 10 21 20 19 14
## 2: 17 10 14 18 9 16
## 3: 5 9 4 5 7 1
## 4: 23 23 17 6 14 13
## 5: 2 5 5 8 8 6
## 6: 13 16 8 21 14 22
## 7: NA NA NA NA NA NA
## 8: 37 52 14 19 5 11
## 9: 28 29 33 19 35 21
## 10: 14 32 22 30 23 35
## 2016-05-28 2016-05-29 2016-05-30 2016-05-31 2016-06-01 2016-06-02
## 1: 12 15 17 16 21 27
## 2: 17 6 15 18 10 11
## 3: 5 1 5 4 5 7
## 4: 13 9 11 35 8 12
## 5: 3 7 7 6 6 2
## 6: 5 10 9 15 19 14
## 7: NA NA NA NA NA NA
## 8: 15 27 11 38 23 188
## 9: 46 34 30 26 28 28
## 10: 21 22 33 27 17 26
## 2016-06-03 2016-06-04 2016-06-05 2016-06-06 2016-06-07 2016-06-08
## 1: 13 11 15 14 18 18
## 2: 16 10 12 12 13 9
## 3: 7 5 3 4 1 9
## 4: 15 10 25 9 8 8
## 5: 8 3 7 8 3 4
## 6: 8 34 9 10 11 17
## 7: NA NA NA NA NA NA
## 8: 27 141 52 40 62 12
## 9: 17 34 45 37 23 27
## 10: 20 25 22 38 19 27
## 2016-06-09 2016-06-10 2016-06-11 2016-06-12 2016-06-13 2016-06-14
## 1: 10 11 14 18 14 13
## 2: 16 19 19 11 15 10
## 3: 3 4 6 2 2 1
## 4: 10 14 9 11 303 29
## 5: 5 2 1 1 1 2
## 6: 18 27 15 16 28 13
## 7: NA NA NA NA NA NA
## 8: 13 148 133 26 9 17
## 9: 71 203 47 32 44 45
## 10: 31 29 15 31 26 25
## 2016-06-15 2016-06-16 2016-06-17 2016-06-18 2016-06-19 2016-06-20
## 1: 17 15 14 234 8 62
## 2: 20 25 9 14 10 14
## 3: 16 6 3 3 6 1
## 4: 121 69 39 25 27 54
## 5: 8 6 1 0 4 2
## 6: 31 24 19 8 9 12
## 7: NA NA NA NA NA NA
## 8: 42 50 45 33 34 27
## 9: 41 29 37 84 58 33
## 10: 38 22 21 27 25 38
## 2016-06-21 2016-06-22 2016-06-23 2016-06-24 2016-06-25 2016-06-26
## 1: 26 22 8 22 15 69
## 2: 18 25 13 24 14 13
## 3: 6 1 4 3 5 1
## 4: 39 24 22 20 14 12
## 5: 6 2 2 2 1 5
## 6: 19 11 11 23 17 13
## 7: NA NA NA NA NA NA
## 8: 19 14 12 16 94 61
## 9: 59 52 47 31 34 42
## 10: 35 34 24 29 35 27
## 2016-06-27 2016-06-28 2016-06-29 2016-06-30 2016-07-01 2016-07-02
## 1: 11 18 23 12 20 17
## 2: 14 24 16 15 13 11
## 3: 6 5 1 4 5 4
## 4: 8 17 11 15 19 20
## 5: 2 2 2 3 10 1
## 6: 12 12 16 9 4 7
## 7: NA NA NA 61 5 18
## 8: 23 37 15 35 33 128
## 9: 28 40 40 35 33 108
## 10: 21 31 21 30 26 21
## 2016-07-03 2016-07-04 2016-07-05 2016-07-06 2016-07-07 2016-07-08
## 1: 15 16 18 21 15 30
## 2: 12 28 28 17 27 48
## 3: 2 4 3 4 2 0
## 4: 11 36 19 35 22 14
## 5: 3 4 2 3 4 1
## 6: 14 8 9 9 17 22
## 7: 26 3 10 5 2 16
## 8: 11 26 21 44 27 16
## 9: 27 50 43 56 35 35
## 10: 21 29 35 27 22 19
## 2016-07-09 2016-07-10 2016-07-11 2016-07-12 2016-07-13 2016-07-14
## 1: 115 56 45 17 18 15
## 2: 184 64 24 92 31 34
## 3: 1 3 12 4 7 5
## 4: 17 15 12 34 20 25
## 5: 1 9 0 1 6 2
## 6: 11 20 11 15 10 15
## 7: 40 57 86 8 9 9
## 8: 14 37 75 39 42 34
## 9: 29 33 37 70 48 34
## 10: 22 37 30 22 41 27
## 2016-07-15 2016-07-16 2016-07-17 2016-07-18 2016-07-19 2016-07-20
## 1: 18 14 15 15 24 22
## 2: 49 21 36 32 16 16
## 3: 6 6 6 3 3 3
## 4: 15 18 19 13 17 16
## 5: 5 2 2 3 2 11
## 6: 7 15 12 14 8 11
## 7: 3 22 38 18 10 18
## 8: 63 68 114 194 229 153
## 9: 48 28 35 27 48 40
## 10: 23 27 19 37 28 36
## 2016-07-21 2016-07-22 2016-07-23 2016-07-24 2016-07-25 2016-07-26
## 1: 18 30 12 13 18 17
## 2: 19 22 22 19 18 18
## 3: 5 5 2 11 6 2
## 4: 11 22 43 8 13 16
## 5: 1 4 4 2 10 5
## 6: 14 10 26 28 19 16
## 7: 14 9 11 12 6 10
## 8: 44 192 55 54 102 81
## 9: 37 41 26 42 24 25
## 10: 37 35 35 29 33 39
## 2016-07-27 2016-07-28 2016-07-29 2016-07-30 2016-07-31 2016-08-01
## 1: 31 26 29 12 19 19
## 2: 17 35 49 19 25 24
## 3: 2 3 7 5 4 5
## 4: 8 19 14 9 13 13
## 5: 3 10 2 5 7 2
## 6: 12 7 20 13 10 18
## 7: 59 15 9 86 10 121
## 8: 91 76 37 52 136 9
## 9: 42 56 28 41 54 58
## 10: 31 28 33 32 27 16
## 2016-08-02 2016-08-03 2016-08-04 2016-08-05 2016-08-06 2016-08-07
## 1: 57 17 20 49 10 19
## 2: 39 19 29 30 16 54
## 3: 3 3 9 7 2 1
## 4: 16 10 10 11 17 32
## 5: 5 8 2 5 1 1
## 6: 12 26 12 12 13 14
## 7: 26 12 8 11 76 50
## 8: 88 113 123 176 520 123
## 9: 40 31 34 21 47 29
## 10: 28 24 30 30 17 30
## 2016-08-08 2016-08-09 2016-08-10 2016-08-11 2016-08-12 2016-08-13
## 1: 26 41 23 30 55 17
## 2: 15 39 19 17 60 12
## 3: 5 6 7 13 3 5
## 4: 21 16 23 15 55 17
## 5: 2 6 6 2 1 3
## 6: 29 15 18 48 14 21
## 7: 23 7 11 14 11 54
## 8: 93 43 37 137 35 64
## 9: 44 65 28 35 22 37
## 10: 20 27 26 30 27 31
## 2016-08-14 2016-08-15 2016-08-16 2016-08-17 2016-08-18 2016-08-19
## 1: 24 14 12 49 42 37
## 2: 77 63 12 9 34 30
## 3: 6 2 4 1 2 7
## 4: 17 15 7 13 11 11
## 5: 2 3 4 3 2 0
## 6: 14 20 8 10 21 19
## 7: 30 37 6 9 19 15
## 8: 59 87 84 67 138 156
## 9: 43 34 18 24 33 27
## 10: 19 21 19 30 18 20
## 2016-08-20 2016-08-21 2016-08-22 2016-08-23 2016-08-24 2016-08-25
## 1: 13 30 20 33 20 14
## 2: 13 20 29 10 14 23
## 3: 2 2 4 4 2 5
## 4: 8 22 5 7 18 9
## 5: 13 4 2 4 3 3
## 6: 7 10 18 11 18 15
## 7: 23 97 11 26 9 10
## 8: 62 200 73 110 135 145
## 9: 38 18 27 28 60 31
## 10: 19 25 47 28 32 32
## 2016-08-26 2016-08-27 2016-08-28 2016-08-29 2016-08-30 2016-08-31
## 1: 40 15 18 26 8 25
## 2: 15 12 25 22 144 31
## 3: 3 2 3 5 4 2
## 4: 13 27 15 19 7 9
## 5: 1 3 5 2 3 2
## 6: 12 10 8 12 15 9
## 7: 21 30 38 8 11 14
## 8: 151 87 66 92 85 77
## 9: 40 30 36 35 21 27
## 10: 21 37 17 26 26 23
## 2016-09-01 2016-09-02 2016-09-03 2016-09-04 2016-09-05 2016-09-06
## 1: 21 20 25 19 23 18
## 2: 31 17 66 78 19 44
## 3: 5 7 5 2 7 6
## 4: 14 14 9 16 11 7
## 5: 4 3 39 4 3 1
## 6: 19 19 16 13 17 16
## 7: 11 32 54 65 22 28
## 8: 87 91 23 48 87 37
## 9: 26 29 22 35 22 31
## 10: 32 23 14 22 24 28
## 2016-09-07 2016-09-08 2016-09-09 2016-09-10 2016-09-11 2016-09-12
## 1: 19 18 55 16 65 11
## 2: 43 35 13 13 25 15
## 3: 11 10 5 19 7 11
## 4: 14 13 11 9 9 9
## 5: 5 5 5 5 8 15
## 6: 17 16 44 16 19 6
## 7: 21 12 12 15 18 4
## 8: 35 53 56 49 35 66
## 9: 24 34 15 47 45 27
## 10: 21 28 19 25 29 12
## 2016-09-13 2016-09-14 2016-09-15 2016-09-16 2016-09-17 2016-09-18
## 1: 11 13 20 21 13 24
## 2: 37 38 22 28 19 46
## 3: 4 10 3 4 6 3
## 4: 11 15 28 10 24 8
## 5: 13 63 2 2 3 6
## 6: 11 23 30 13 14 17
## 7: 7 10 30 16 45 30
## 8: 44 43 64 193 43 147
## 9: 36 27 29 32 32 33
## 10: 28 31 32 25 18 39
## 2016-09-19 2016-09-20 2016-09-21 2016-09-22 2016-09-23 2016-09-24
## 1: 20 13 32 16 10 13
## 2: 24 22 43 58 26 20
## 3: 4 8 10 3 3 1
## 4: 20 19 12 31 14 9
## 5: 10 2 8 4 3 3
## 6: 15 15 15 18 15 22
## 7: 15 33 12 4 6 18
## 8: 75 66 27 18 120 29
## 9: 30 27 29 34 31 27
## 10: 25 15 27 23 37 28
## 2016-09-25 2016-09-26 2016-09-27 2016-09-28 2016-09-29 2016-09-30
## 1: 44 17 13 72 40 19
## 2: 27 35 20 31 24 24
## 3: 10 5 4 4 3 4
## 4: 40 15 83 60 19 15
## 5: 6 4 1 5 9 1
## 6: 19 18 15 19 15 20
## 7: 10 43 10 52 28 30
## 8: 25 64 65 90 44 70
## 9: 28 25 24 34 34 28
## 10: 23 22 41 23 25 25
## 2016-10-01 2016-10-02 2016-10-03 2016-10-04 2016-10-05 2016-10-06
## 1: 14 13 12 14 10 26
## 2: 94 18 20 18 16 38
## 3: 1 3 6 6 6 3
## 4: 15 12 23 17 20 26
## 5: 6 4 0 4 9 6
## 6: 27 15 59 28 15 18
## 7: 83 46 20 10 11 28
## 8: 29 64 62 33 27 42
## 9: 50 23 47 24 37 19
## 10: 25 30 20 17 25 29
## 2016-10-07 2016-10-08 2016-10-09 2016-10-10 2016-10-11 2016-10-12
## 1: 13 22 14 23 12 8
## 2: 54 29 49 25 72 144
## 3: 5 11 6 3 7 6
## 4: 11 13 9 44 7 18
## 5: 8 13 4 7 6 9
## 6: 16 78 97 35 40 97
## 7: 10 27 7 10 18 12
## 8: 68 20 85 56 58 21
## 9: 19 28 42 40 29 29
## 10: 19 18 19 25 20 32
## 2016-10-13 2016-10-14 2016-10-15 2016-10-16 2016-10-17 2016-10-18
## 1: 50 13 10 16 14 10
## 2: 36 97 179 29 12 21
## 3: 0 2 4 4 3 6
## 4: 4 36 34 10 8 21
## 5: 3 21 6 13 10 2
## 6: 49 24 29 42 30 30
## 7: 12 11 21 40 13 14
## 8: 104 54 78 113 41 48
## 9: 45 24 24 29 49 23
## 10: 31 30 20 22 40 27
## 2016-10-19 2016-10-20 2016-10-21 2016-10-22 2016-10-23 2016-10-24
## 1: 24 10 20 10 26 25
## 2: 42 53 41 19 25 19
## 3: 4 3 4 1 6 5
## 4: 7 6 12 15 9 13
## 5: 3 6 7 10 6 6
## 6: 38 25 18 27 27 32
## 7: 11 8 12 13 8 47
## 8: 31 21 56 52 89 83
## 9: 32 17 26 30 19 35
## 10: 20 23 28 22 25 28
## 2016-10-25 2016-10-26 2016-10-27 2016-10-28 2016-10-29 2016-10-30
## 1: 16 19 20 12 19 50
## 2: 15 21 21 27 33 15
## 3: 5 2 3 3 2 2
## 4: 21 13 10 21 15 103
## 5: 4 173 5 10 10 18
## 6: 27 25 15 15 19 102
## 7: 32 96 9 14 17 42
## 8: 44 80 53 38 102 54
## 9: 44 30 40 23 26 43
## 10: 13 26 17 18 22 21
## 2016-10-31 2016-11-01 2016-11-02 2016-11-03 2016-11-04 2016-11-05
## 1: 16 30 18 25 14 20
## 2: 24 13 11 14 26 11
## 3: 6 1 3 3 3 2
## 4: 22 15 12 11 15 7
## 5: 20 11 5 6 33 13
## 6: 23 26 21 25 53 13
## 7: 17 7 20 12 10 5
## 8: 41 77 73 21 77 107
## 9: 42 19 29 24 72 35
## 10: 27 23 31 20 21 21
## 2016-11-06 2016-11-07 2016-11-08 2016-11-09 2016-11-10 2016-11-11
## 1: 8 67 13 41 10 21
## 2: 21 14 14 54 5 10
## 3: 10 2 2 2 7 3
## 4: 12 13 9 8 21 16
## 5: 10 22 11 8 4 10
## 6: 22 33 19 18 21 24
## 7: 87 26 46 11 9 7
## 8: 185 33 150 42 80 70
## 9: 27 89 52 30 25 60
## 10: 33 39 37 35 22 13
## 2016-11-12 2016-11-13 2016-11-14 2016-11-15 2016-11-16 2016-11-17
## 1: 13 8 15 14 12 6
## 2: 12 11 14 28 23 20
## 3: 6 4 2 4 6 5
## 4: 38 13 14 17 26 14
## 5: 13 11 8 6 10 14
## 6: 22 21 18 30 20 12
## 7: 11 7 8 14 9 8
## 8: 81 59 19 55 48 44
## 9: 25 63 32 44 25 27
## 10: 32 20 20 37 19 27
## 2016-11-18 2016-11-19 2016-11-20 2016-11-21 2016-11-22 2016-11-23
## 1: 11 10 42 21 24 14
## 2: 9 12 11 14 14 15
## 3: 4 4 3 3 9 3
## 4: 10 9 23 15 7 10
## 5: 6 9 6 16 14 13
## 6: 17 12 122 31 16 15
## 7: 3 7 7 8 10 6
## 8: 21 58 141 115 30 74
## 9: 23 34 90 82 30 33
## 10: 22 11 17 14 37 20
## 2016-11-24 2016-11-25 2016-11-26 2016-11-27 2016-11-28 2016-11-29
## 1: 11 204 14 45 33 28
## 2: 15 11 20 13 19 621
## 3: 5 4 0 1 4 5
## 4: 7 10 14 17 11 9
## 5: 15 14 16 9 178 64
## 6: 16 23 30 12 6 17
## 7: 10 9 6 15 7 8
## 8: 11 49 28 62 28 30
## 9: 28 25 30 40 20 65
## 10: 25 28 27 20 50 29
## 2016-11-30 2016-12-01 2016-12-02 2016-12-03 2016-12-04 2016-12-05
## 1: 18 14 47 15 14 18
## 2: 57 17 23 19 21 47
## 3: 8 8 1 1 2 5
## 4: 11 5 10 8 17 13
## 5: 12 10 11 6 8 7
## 6: 17 18 15 19 35 16
## 7: 9 6 6 4 18 11
## 8: 32 59 17 38 76 97
## 9: 45 26 18 74 71 35
## 10: 23 34 34 23 19 29
## 2016-12-06 2016-12-07 2016-12-08 2016-12-09 2016-12-10 2016-12-11
## 1: 20 14 16 14 20 60
## 2: 28 22 22 65 27 17
## 3: 3 3 3 7 3 9
## 4: 23 40 16 17 41 17
## 5: 9 8 5 11 8 4
## 6: 25 13 19 15 25 135
## 7: 12 8 7 15 13 1
## 8: 18 163 39 33 23 16
## 9: 33 22 100 119 26 59
## 10: 26 24 25 32 26 17
## 2016-12-12 2016-12-13 2016-12-14 2016-12-15 2016-12-16 2016-12-17
## 1: 22 15 17 19 18 21
## 2: 17 13 9 18 22 17
## 3: 8 3 210 5 4 6
## 4: 8 9 18 12 12 18
## 5: 15 5 8 8 6 7
## 6: 22 18 12 21 16 52
## 7: 8 15 6 10 16 3
## 8: 102 22 52 39 125 189
## 9: 81 20 31 31 30 59
## 10: 32 22 39 62 33 35
## 2016-12-18 2016-12-19 2016-12-20 2016-12-21 2016-12-22 2016-12-23
## 1: 21 47 65 17 32 63
## 2: 15 22 23 19 17 42
## 3: 2 2 4 3 3 1
## 4: 13 18 23 10 32 10
## 5: 15 4 11 7 48 9
## 6: 24 15 28 17 16 27
## 7: 9 4 7 7 2 7
## 8: 49 55 26 77 64 35
## 9: 111 32 34 31 34 105
## 10: 24 26 33 38 25 17
## 2016-12-24 2016-12-25 2016-12-26 2016-12-27 2016-12-28 2016-12-29
## 1: 15 26 14 20 22 19
## 2: 28 15 9 30 52 45
## 3: 1 7 4 4 6 3
## 4: 26 27 16 11 17 19
## 5: 25 13 3 11 27 13
## 6: 8 17 32 19 23 17
## 7: 33 8 11 4 15 6
## 8: 35 28 20 23 32 39
## 9: 72 36 33 30 36 38
## 10: 22 29 30 29 35 44
## 2016-12-30 2016-12-31
## 1: 18 20
## 2: 26 20
## 3: 4 17
## 4: 10 11
## 5: 36 10
## 6: 17 50
## 7: 8 6
## 8: 32 17
## 9: 31 97
## 10: 26 41
str(dt_org)
## Classes 'data.table' and 'data.frame': 145063 obs. of 551 variables:
## $ Page : chr "2NE1_zh.wikipedia.org_all-access_spider" "2PM_zh.wikipedia.org_all-access_spider" "3C_zh.wikipedia.org_all-access_spider" "4minute_zh.wikipedia.org_all-access_spider" ...
## $ 2015-07-01: int 18 11 1 35 NA 12 NA 118 5 6 ...
## $ 2015-07-02: int 11 14 0 13 NA 7 NA 26 23 3 ...
## $ 2015-07-03: int 5 15 1 10 NA 4 NA 30 14 5 ...
## $ 2015-07-04: int 13 18 1 94 NA 5 NA 24 12 12 ...
## $ 2015-07-05: int 14 11 0 4 NA 20 NA 29 9 6 ...
## $ 2015-07-06: int 9 13 4 26 NA 8 NA 127 9 5 ...
## $ 2015-07-07: int 9 22 0 14 NA 5 NA 53 35 4 ...
## $ 2015-07-08: int 22 11 3 9 NA 17 NA 37 15 13 ...
## $ 2015-07-09: int 26 10 4 11 NA 24 NA 20 14 9 ...
## $ 2015-07-10: int 24 4 4 16 NA 7 NA 32 22 15 ...
## $ 2015-07-11: int 19 41 1 16 NA 12 NA 17 8 18 ...
## $ 2015-07-12: int 10 65 1 11 NA 11 NA 23 16 7 ...
## $ 2015-07-13: int 14 57 1 23 NA 7 NA 47 18 8 ...
## $ 2015-07-14: int 15 38 6 145 NA 9 NA 33 12 12 ...
## $ 2015-07-15: int 8 20 8 14 NA 6 NA 47 14 25 ...
## $ 2015-07-16: int 16 62 6 17 NA 10 NA 58 14 23 ...
## $ 2015-07-17: int 8 44 4 85 NA 8 NA 29 7 6 ...
## $ 2015-07-18: int 8 15 5 4 NA 13 NA 187 7 10 ...
## $ 2015-07-19: int 16 10 1 30 NA 3 NA 128 20 7 ...
## $ 2015-07-20: int 7 47 2 22 NA 14 NA 34 82 3 ...
## $ 2015-07-21: int 11 24 3 9 NA 4 NA 38 8 17 ...
## $ 2015-07-22: int 10 17 8 10 NA 9 NA 8 17 10 ...
## $ 2015-07-23: int 20 22 8 11 NA 14 NA 38 18 16 ...
## $ 2015-07-24: int 18 9 6 7 NA 10 NA 17 15 10 ...
## $ 2015-07-25: int 15 39 6 7 NA 8 NA 45 23 7 ...
## $ 2015-07-26: int 14 13 2 11 NA 3 NA 14 11 6 ...
## $ 2015-07-27: int 49 11 2 9 NA 74 NA 15 20 15 ...
## $ 2015-07-28: int 10 12 3 11 NA 17 NA 56 35 12 ...
## $ 2015-07-29: int 16 21 2 44 NA 8 NA 30 20 32 ...
## $ 2015-07-30: int 18 19 4 8 NA 6 NA 15 11 7 ...
## $ 2015-07-31: int 8 9 3 14 NA 9 NA 115 13 10 ...
## $ 2015-08-01: int 5 15 3 19 NA 3 NA 6 11 5 ...
## $ 2015-08-02: int 9 33 5 10 NA 10 NA 25 13 13 ...
## $ 2015-08-03: int 7 8 3 17 NA 21 NA 10 20 22 ...
## $ 2015-08-04: int 13 8 5 17 NA 9 NA 135 47 15 ...
## $ 2015-08-05: int 9 7 4 10 NA 5 NA 40 25 12 ...
## $ 2015-08-06: int 7 13 2 7 NA 3 NA 63 93 9 ...
## $ 2015-08-07: int 4 2 5 10 NA 4 NA 32 13 3 ...
## $ 2015-08-08: int 11 23 1 1 NA 1 NA 35 18 12 ...
## $ 2015-08-09: int 10 12 4 8 NA 7 NA 65 24 6 ...
## $ 2015-08-10: int 5 27 5 27 NA 3 NA 14 8 4 ...
## $ 2015-08-11: int 9 27 0 19 NA 15 NA 21 13 6 ...
## $ 2015-08-12: int 9 36 0 16 NA 5 NA 35 6 4 ...
## $ 2015-08-13: int 9 23 7 2 NA 6 NA 50 12 4 ...
## $ 2015-08-14: int 9 58 3 84 NA 9 NA 49 5 16 ...
## $ 2015-08-15: int 13 80 5 22 NA 4 NA 38 44 11 ...
## $ 2015-08-16: int 4 60 1 14 NA 6 NA 12 15 8 ...
## $ 2015-08-17: int 15 69 6 47 NA 7 NA 37 13 5 ...
## $ 2015-08-18: int 25 42 2 25 NA 9 NA 87 19 5 ...
## $ 2015-08-19: int 9 161 5 14 NA 11 NA 66 12 6 ...
## $ 2015-08-20: int 5 94 0 11 NA 11 NA 90 6 12 ...
## $ 2015-08-21: int 6 77 3 12 NA 2 NA 64 11 9 ...
## $ 2015-08-22: int 20 78 1 27 NA 3 NA 402 12 15 ...
## $ 2015-08-23: int 3 20 0 8 NA 7 NA 86 24 10 ...
## $ 2015-08-24: int 14 24 1 17 NA 23 NA 90 27 10 ...
## $ 2015-08-25: int 46 13 1 43 NA 4 NA 30 60 15 ...
## $ 2015-08-26: int 5 14 2 3 NA 8 NA 86 11 5 ...
## $ 2015-08-27: int 5 26 4 19 NA 8 NA 61 18 7 ...
## $ 2015-08-28: int 13 8 2 14 NA 10 NA 7 10 9 ...
## $ 2015-08-29: int 4 82 1 20 NA 5 NA 44 15 7 ...
## $ 2015-08-30: int 9 22 1 43 NA 8 NA 7 12 9 ...
## $ 2015-08-31: int 10 11 3 4 NA 4 NA 15 37 9 ...
## $ 2015-09-01: int 9 81 4 5 NA 4 NA 11 18 7 ...
## $ 2015-09-02: int 11 37 3 37 NA 8 NA 61 15 12 ...
## $ 2015-09-03: int 11 9 6 23 NA 18 NA 9 7 6 ...
## $ 2015-09-04: int 11 40 6 14 NA 6 NA 23 25 9 ...
## $ 2015-09-05: int 9 47 4 12 NA 4 NA 31 20 9 ...
## $ 2015-09-06: int 15 18 3 13 NA 9 NA 154 14 9 ...
## $ 2015-09-07: int 5 23 3 22 NA 6 NA 11 14 26 ...
## $ 2015-09-08: int 10 6 2 12 NA 8 NA 16 14 2 ...
## $ 2015-09-09: int 7 2 9 12 NA 5 NA 5 13 7 ...
## $ 2015-09-10: int 4 7 7 6 NA 11 NA 6 7 15 ...
## $ 2015-09-11: int 8 16 2 27 NA 5 NA 7 15 10 ...
## $ 2015-09-12: int 9 10 3 5 NA 10 NA 5 12 8 ...
## $ 2015-09-13: int 10 34 1 7 NA 25 NA 4 15 2 ...
## $ 2015-09-14: int 6 14 3 24 NA 29 NA 9 7 17 ...
## $ 2015-09-15: int 13 31 1 8 NA 3 NA 12 15 9 ...
## $ 2015-09-16: int 16 20 6 9 NA 10 NA 31 32 24 ...
## $ 2015-09-17: int 6 23 7 10 NA 9 NA 11 15 23 ...
## $ 2015-09-18: int 24 14 1 12 NA 16 NA 11 23 14 ...
## $ 2015-09-19: int 9 16 2 19 NA 13 NA 10 78 14 ...
## $ 2015-09-20: int 11 34 5 7 NA 7 NA 21 107 13 ...
## $ 2015-09-21: int 12 15 2 7 NA 22 NA 7 25 21 ...
## $ 2015-09-22: int 8 30 3 18 NA 9 NA 9 55 24 ...
## $ 2015-09-23: int 14 13 8 15 NA 9 NA 5 7 19 ...
## $ 2015-09-24: int 6 30 5 7 NA 17 NA 1 41 18 ...
## $ 2015-09-25: int 6 15 0 9 NA 10 NA 68 31 13 ...
## $ 2015-09-26: int 11 25 4 10 NA 24 NA 9 25 14 ...
## $ 2015-09-27: int 14 17 1 9 NA 13 NA 4 22 14 ...
## $ 2015-09-28: int 6 8 5 14 NA 23 NA 10 14 15 ...
## $ 2015-09-29: int 10 12 3 8 NA 12 NA 44 16 10 ...
## $ 2015-09-30: int 20 17 0 17 NA 2 NA 34 22 16 ...
## $ 2015-10-01: int 7 10 1 6 NA 14 NA 59 20 13 ...
## $ 2015-10-02: int 15 21 8 8 NA 13 NA 53 26 13 ...
## $ 2015-10-03: int 8 18 2 7 NA 25 NA 12 21 14 ...
## $ 2015-10-04: int 15 30 1 5 NA 13 NA 71 25 17 ...
## $ 2015-10-05: int 5 13 3 3 NA 12 NA 26 53 11 ...
## $ 2015-10-06: int 8 7 0 9 NA 11 NA 79 22 6 ...
## [list output truncated]
## - attr(*, ".internal.selfref")=<externalptr>
dim(dt_org)
## [1] 145063 551
### sampled the train data to 1/10
set.seed(1234)
sample_wiki <- dt_org %>%
sample_frac(0.1) %>%
gather(Date, Visit, -Page) %>% data.table
dim(sample_wiki)
## [1] 7978300 3
### NA values
sapply(sample_wiki, function(x)sum(is.na(x)))
## Page Date Visit
## 0 0 618067
### 0 values
sapply(sample_wiki, function(x) sum(x == 0,na.rm = T))
## Page Date Visit
## 0 0 105813
### '_' in the names
sum(sapply(sample_wiki$Page[1:100000], function(x) str_count(x,pattern = "_")) == 3)
## [1] 37674
### Extract name, project, access, agent from Page
name = mclapply(str_split(sample_wiki$Page,pattern = "_"),function(x) head(x,length(x)-3))
name = mclapply(name, function(x) paste(x,collapse = ' '))
page_split <- mclapply(str_split(sample_wiki$Page,pattern = "_"),function(x) tail(x,3))
add <- data.table(Project= unlist(mclapply(page_split, function(x) x[1])),
Access= unlist(mclapply(page_split, function(x) x[2])),
Agent= unlist(mclapply(page_split, function(x) x[3])),
Name = unlist(name))
### Adding the new fields to the data set columnwise
sample_wiki <- cbind(sample_wiki, add)
head(sample_wiki,10)[,-1]
## Date Visit Project Access Agent
## 1: 2015-07-01 190 zh.wikipedia.org mobile-web all-agents
## 2: 2015-07-01 663 en.wikipedia.org all-access all-agents
## 3: 2015-07-01 41 en.wikipedia.org all-access spider
## 4: 2015-07-01 115 ru.wikipedia.org all-access spider
## 5: 2015-07-01 463 ja.wikipedia.org all-access all-agents
## 6: 2015-07-01 7 www.mediawiki.org all-access spider
## 7: 2015-07-01 147 commons.wikimedia.org desktop all-agents
## 8: 2015-07-01 1704 ja.wikipedia.org desktop all-agents
## 9: 2015-07-01 191 ja.wikipedia.org mobile-web all-agents
## 10: 2015-07-01 4 de.wikipedia.org all-access spider
## Name
## 1: 請回ç”1997
## 2: José Fernández (pitcher)
## 3: Meg Ryan
## 4: ТуÑ\200циÑ\217
## 5: 山本ä¸\200郎 (実æ¥å®¶)
## 6: Beta Features/Hovercards
## 7: Category:Nude standing men
## 8: 2015å¹´
## 9: 寺田農
## 10: Batterie
### Checking missing values in projects
table(sample_wiki[is.na(Visit), Project])
##
## commons.wikimedia.org de.wikipedia.org en.wikipedia.org
## 120252 59063 107047
## es.wikipedia.org fr.wikipedia.org ja.wikipedia.org
## 30533 49271 50189
## ru.wikipedia.org www.mediawiki.org zh.wikipedia.org
## 39624 61790 100298
### Replacing NAs
sample_wiki <- replace_na(sample_wiki,list(Visit = 0))
head(sample_wiki,10)
## Page
## 1: 請回ç”1997_zh.wikipedia.org_mobile-web_all-agents
## 2: José_Fernández_(pitcher)_en.wikipedia.org_all-access_all-agents
## 3: Meg_Ryan_en.wikipedia.org_all-access_spider
## 4: ТуÑ\200циÑ\217_ru.wikipedia.org_all-access_spider
## 5: 山本ä¸\200郎_(実æ¥å®¶)_ja.wikipedia.org_all-access_all-agents
## 6: Beta_Features/Hovercards_www.mediawiki.org_all-access_spider
## 7: Category:Nude_standing_men_commons.wikimedia.org_desktop_all-agents
## 8: 2015å¹´_ja.wikipedia.org_desktop_all-agents
## 9: 寺田農_ja.wikipedia.org_mobile-web_all-agents
## 10: Batterie_de.wikipedia.org_all-access_spider
## Date Visit Project Access Agent
## 1: 2015-07-01 190 zh.wikipedia.org mobile-web all-agents
## 2: 2015-07-01 663 en.wikipedia.org all-access all-agents
## 3: 2015-07-01 41 en.wikipedia.org all-access spider
## 4: 2015-07-01 115 ru.wikipedia.org all-access spider
## 5: 2015-07-01 463 ja.wikipedia.org all-access all-agents
## 6: 2015-07-01 7 www.mediawiki.org all-access spider
## 7: 2015-07-01 147 commons.wikimedia.org desktop all-agents
## 8: 2015-07-01 1704 ja.wikipedia.org desktop all-agents
## 9: 2015-07-01 191 ja.wikipedia.org mobile-web all-agents
## 10: 2015-07-01 4 de.wikipedia.org all-access spider
## Name
## 1: 請回ç”1997
## 2: José Fernández (pitcher)
## 3: Meg Ryan
## 4: ТуÑ\200циÑ\217
## 5: 山本ä¸\200郎 (実æ¥å®¶)
## 6: Beta Features/Hovercards
## 7: Category:Nude standing men
## 8: 2015å¹´
## 9: 寺田農
## 10: Batterie
### Date format
sample_wiki <- sample_wiki %>%
mutate(Date = as.Date(Date,format="%Y-%m-%d"),
Year = year(Date),
Month = month(Date),
Visit_m = Visit/1000000)
head(sample_wiki,10)
## Page
## 1: 請回ç”1997_zh.wikipedia.org_mobile-web_all-agents
## 2: José_Fernández_(pitcher)_en.wikipedia.org_all-access_all-agents
## 3: Meg_Ryan_en.wikipedia.org_all-access_spider
## 4: ТуÑ\200циÑ\217_ru.wikipedia.org_all-access_spider
## 5: 山本ä¸\200郎_(実æ¥å®¶)_ja.wikipedia.org_all-access_all-agents
## 6: Beta_Features/Hovercards_www.mediawiki.org_all-access_spider
## 7: Category:Nude_standing_men_commons.wikimedia.org_desktop_all-agents
## 8: 2015å¹´_ja.wikipedia.org_desktop_all-agents
## 9: 寺田農_ja.wikipedia.org_mobile-web_all-agents
## 10: Batterie_de.wikipedia.org_all-access_spider
## Date Visit Project Access Agent
## 1: 2015-07-01 190 zh.wikipedia.org mobile-web all-agents
## 2: 2015-07-01 663 en.wikipedia.org all-access all-agents
## 3: 2015-07-01 41 en.wikipedia.org all-access spider
## 4: 2015-07-01 115 ru.wikipedia.org all-access spider
## 5: 2015-07-01 463 ja.wikipedia.org all-access all-agents
## 6: 2015-07-01 7 www.mediawiki.org all-access spider
## 7: 2015-07-01 147 commons.wikimedia.org desktop all-agents
## 8: 2015-07-01 1704 ja.wikipedia.org desktop all-agents
## 9: 2015-07-01 191 ja.wikipedia.org mobile-web all-agents
## 10: 2015-07-01 4 de.wikipedia.org all-access spider
## Name Year Month Visit_m
## 1: 請回ç”1997 2015 7 0.000190
## 2: José Fernández (pitcher) 2015 7 0.000663
## 3: Meg Ryan 2015 7 0.000041
## 4: ТуÑ\200циÑ\217 2015 7 0.000115
## 5: 山本ä¸\200郎 (実æ¥å®¶) 2015 7 0.000463
## 6: Beta Features/Hovercards 2015 7 0.000007
## 7: Category:Nude standing men 2015 7 0.000147
## 8: 2015å¹´ 2015 7 0.001704
## 9: 寺田農 2015 7 0.000191
## 10: Batterie 2015 7 0.000004
### Statistics
summary(sample_wiki)
## Page Date Visit Project
## Length:7978300 Min. :2015-07-01 Min. : 0 Length:7978300
## Class :character 1st Qu.:2015-11-15 1st Qu.: 10 Class :character
## Mode :character Median :2016-03-31 Median : 110 Mode :character
## Mean :2016-03-31 Mean : 950
## 3rd Qu.:2016-08-16 3rd Qu.: 593
## Max. :2016-12-31 Max. :17332270
## Access Agent Name Year
## Length:7978300 Length:7978300 Length:7978300 Min. :2015
## Class :character Class :character Class :character 1st Qu.:2015
## Mode :character Mode :character Mode :character Median :2016
## Mean :2016
## 3rd Qu.:2016
## Max. :2016
## Month Visit_m
## Min. : 1.000 Min. : 0.000000
## 1st Qu.: 5.000 1st Qu.: 0.000010
## Median : 8.000 Median : 0.000110
## Mean : 7.511 Mean : 0.000950
## 3rd Qu.:10.000 3rd Qu.: 0.000593
## Max. :12.000 Max. :17.332270
### plots
### Visualization
#### We will start by visualizing the total number of visits to all wikipedia sites per day regardless of Project, Agent or Access type.
# DATE
p_base <- sample_wiki %>%
group_by(Date) %>%
summarise(Visit_m = sum(Visit_m)) %>%
ggplot(aes(Date, Visit_m)) +
geom_line() +
theme_classic(base_size = 12,base_family = 'mono') +
ylab('Visit in Millions') + ggtitle('Daily Traffic')
ggplotly(p_base)
# MONTH
p_month <- sample_wiki %>%
mutate(year_month = format(Date, "%Y-%m")) %>%
group_by(year_month, Project) %>%
summarise(Visit = mean(Visit)) %>%
ggplot(aes(year_month, Visit)) +
geom_bar(stat = 'identity', aes(fill = Project)) +
theme_classic(base_size = 12,base_family = 'mono')+
ylab('Number of Visits') + xlab('Year - Month') + ggtitle('Average Monthly Traffic')
## `summarise()` has grouped output by 'year_month'. You can override using the `.groups` argument.
ggplotly(p_month)
# PROJECT WISE
p_proj <- sample_wiki %>%
group_by(Date,Project) %>%
summarise(Visit_m = sum(Visit_m)) %>%
ggplot(aes(Date, Visit_m)) +
geom_line(aes(color = Project), size = 0.3) +
# facet_grid(~Project,scales = 'free_y',shrink = F) +
theme_classic(base_size = 12,base_family = 'mono') +
theme(legend.position = 'top') +
ylab('Visit in Millions')
## `summarise()` has grouped output by 'Date'. You can override using the `.groups` argument.
ggplotly(p_proj)
## ACCESS
p_access <- sample_wiki %>%
group_by(Date,Access) %>%
summarise(Visit_m = sum(Visit_m)) %>%
ggplot(aes(Date, Visit_m)) +
geom_line(aes(color = Access)) +
theme_classic(base_size = 12,base_family = 'mono') + ylab('Visit in Millions')
## `summarise()` has grouped output by 'Date'. You can override using the `.groups` argument.
ggplotly(p_access)
###AGENTS
p_agent <- sample_wiki %>%
group_by(Date,Agent) %>%
summarise(Visit_m = sum(Visit_m)) %>%
ggplot(aes(Date, Visit_m)) +
geom_line(aes(color = Agent))+
theme_classic(base_size = 12,base_family = 'mono') + ylab('Visit in Millions')
## `summarise()` has grouped output by 'Date'. You can override using the `.groups` argument.
ggplotly(p_agent)
### Selecting only top 1% of the data projectwise
top_1_proj <- sample_wiki %>%
group_by(Project, Name) %>%
summarise(Visit = sum(Visit)) %>%
top_n(1, Visit) %>% data.table
## `summarise()` has grouped output by 'Project'. You can override using the `.groups` argument.
top_1_proj
## Project Name Visit
## 1: commons.wikimedia.org Special:UploadWizard 17013799
## 2: de.wikipedia.org Hauptseite 26411562
## 3: en.wikipedia.org Prince (musician) 38882618
## 4: es.wikipedia.org Arroba (sÃmbolo) 7221526
## 5: fr.wikipedia.org Wikipédia:Accueil principal 239589012
## 6: ja.wikipedia.org ã\201‚ã\201•ã\201Œæ\235¥ã\201Ÿ 9191689
## 7: ru.wikipedia.org ЗаглавнаÑ\217 Ñ\201Ñ‚Ñ\200аница 742880016
## 8: www.mediawiki.org Special:MyLanguage/How to contribute 2873720
## 9: zh.wikipedia.org Wikipedia:首页 55567948
### Top pprojects for the year
# summarize by project and year, top 1
top_1_proj_yr <- sample_wiki %>%
group_by(Project, Year, Name) %>%
summarise(Visit = sum(Visit)) %>%
top_n(1, Visit) %>%
spread(Year,Visit) %>% data.table
## `summarise()` has grouped output by 'Project', 'Year'. You can override using the `.groups` argument.
top_1_proj_yr
## Project Name 2015
## 1: commons.wikimedia.org Special:UploadWizard 6915513
## 2: de.wikipedia.org Hauptseite 9645736
## 3: en.wikipedia.org Prince (musician) NA
## 4: en.wikipedia.org Star Wars 18000155
## 5: es.wikipedia.org Arroba (sÃmbolo) NA
## 6: es.wikipedia.org Star Wars 2728925
## 7: fr.wikipedia.org Wikipédia:Accueil principal 55975153
## 8: ja.wikipedia.org ã\201‚ã\201•ã\201Œæ\235¥ã\201Ÿ 4475086
## 9: ru.wikipedia.org ЗаглавнаÑ\217 Ñ\201Ñ‚Ñ\200аница 138231149
## 10: www.mediawiki.org How to contribute 1108015
## 11: www.mediawiki.org Special:CreateAccount NA
## 12: zh.wikipedia.org Wikipedia:首页 16844423
## 2016
## 1: 10098286
## 2: 16765826
## 3: 36357931
## 4: NA
## 5: 5102560
## 6: NA
## 7: 183613859
## 8: 4716603
## 9: 604648867
## 10: NA
## 11: 2314512
## 12: 38723525
### YEAR 2015 AD 16
sample<-sample_wiki %>%
group_by(Project, Year, Name) %>%
summarise(Visit = sum(Visit)) %>% data.table
## `summarise()` has grouped output by 'Project', 'Year'. You can override using the `.groups` argument.
wiki <- sample[grepl('en',Project) & !grepl(Name,pattern = c('Special:'))]
wiki_15 <- wiki[Year == 2015]
wiki_16 <- wiki[Year == 2016]
### top in 2015 # time trend by the top phrases
top_10_en_15 <- top_n(wiki_15, 10,Visit) %>% select(Name)
sample_wiki %>%
filter(Name %in% top_10_en_15$Name,
Year == 2015) %>%
ggplot() +
geom_bar(aes(x= Date,y = Visit_m), stat = 'identity', fill = 'red',alpha = 0.7) +
facet_wrap(~Name, scales = 'fixed',nrow = 5) +
theme_classic(base_size = 12,base_family = 'mono') + ylab('Visit in Millions') +
ggtitle('Top 10 Visited Pages in 2015')
### TOP IN 2016
top_10_en_16 <- top_n(wiki_16, 10,Visit) %>% select(Name)
# time trend by the top phrases
sample_wiki %>%
filter(Name %in% top_10_en_16$Name,
Year == 2016) %>%
ggplot() +
geom_bar(aes(x= Date,y = Visit_m), fill = 'red', alpha = 0.7, stat = 'identity') +
facet_wrap(~Name, scales = 'free_y', nrow = 5) +
theme_classic(base_size = 12,base_family = 'mono') + ylab('Visit in Millions') +
ggtitle('Top 10 Visited Pages in 2016')
## Loading required package: TSA
## Warning: package 'TSA' was built under R version 4.1.2
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
train <-fread('train_1.csv',header = TRUE, data.table = TRUE, na.strings=c("NA","?", ""))
x <- unlist(train[Page=="Eminem_en.wikipedia.org_desktop_all-agents", -1])
x <- tsclean(x)
pacf(x)
acf(x)
ndiffs(x)
## [1] 1
p <- periodogram(x)
data.table(period=1/p$freq, spec=p$spec)[order(-spec)][1:2]
## period spec
## 1: 576 368918852
## 2: 192 207858655
dim(x)
## NULL
# Train set
y <- ts(x[1:490])
# Test set
y.te <- x[491:550]
# Base model
fit0 <- auto.arima(y)
(bestfit <- list(aicc=fit0$aicc, i=0, j=0, fit=fit0))
## $aicc
## [1] 8011.171
##
## $i
## [1] 0
##
## $j
## [1] 0
##
## $fit
## Series: y
## ARIMA(5,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1
## -0.0133 -0.3526 -0.1953 -0.2436 -0.2276 -0.2751
## s.e. 0.1078 0.0477 0.0583 0.0476 0.0545 0.1055
##
## sigma^2 estimated as 748828: log likelihood=-3998.47
## AIC=8010.94 AICc=8011.17 BIC=8040.28
fc0 <- forecast(fit0, h=60)
plot(fc0)
# Choose the best model by AICc
for(i in 1:3) {
for (j in 1:3){
z1 <- fourier(ts(y, frequency=576), K=i)
z2 <- fourier(ts(y, frequency=192), K=j)
fit1<-auto.arima(y, xreg=z1, seasonal=F)
fit3<-fit1
fit2<-auto.arima(y, xreg=z2, seasonal=F)
if(fit1$aicc < fit2$aicc){
fit3<-fit1
}else{
fir3<-fit2
}
if(fit3$aicc < bestfit$aicc) {
bestfit <- list(aicc=fit3$aicc, i=i, j=j, fit=fit3)
}
}
}
bestfit
## $aicc
## [1] 8011.171
##
## $i
## [1] 0
##
## $j
## [1] 0
##
## $fit
## Series: y
## ARIMA(5,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1
## -0.0133 -0.3526 -0.1953 -0.2436 -0.2276 -0.2751
## s.e. 0.1078 0.0477 0.0583 0.0476 0.0545 0.1055
##
## sigma^2 estimated as 748828: log likelihood=-3998.47
## AIC=8010.94 AICc=8011.17 BIC=8040.28
fc <- forecast(bestfit$fit,
xreg=cbind(
fourier(ts(y, frequency=576), K=bestfit$i, h=60),
fourier(ts(y, frequency=192), K=bestfit$j, h=60)))
## Warning in forecast.forecast_ARIMA(bestfit$fit, xreg = cbind(fourier(ts(y, :
## xreg not required by this model, ignoring the provided regressors
plot(fc)
fc.tbats <- forecast(tbats(y, seasonal.periods=c(576,192)), h=60)
plot(fc.tbats)
mape <- function(act, fc){
pred <- as.vector(fc$mean)
mean(abs((act-pred)/abs(act))) * 100
}
mape(y.te, fc0)
## [1] 14.30624
mape(y.te, fc)
## [1] 14.49922
mape(y.te, fc.tbats)
## [1] 13.19018
plot(as.ts(x))
plot(fc0)
plot(fc)
plot(fc.tbats)